#!/usr/bin/env python
# coding: utf-8

# In[1]:


exec(open('init_path.py').read())
exec((P_Lib/'GasStation.py').read_text())
get_ipython().run_line_magic('matplotlib', 'inline')


# ### Raw Response Time  

# In[27]:


# fuel = 'e10'
# fuel = 'e5'
#fuel = 'diesel'
h_min = 7; h_max = 21
# h_min = 0; h_max = 6
fuels = ['e5','e10','diesel'] 

# In[28]:


# dict_stid_to_rival = load_obj(P_GS_Data / 'GS' / 'dict_stid_to_rival.pkl')
 dict_stid_to_rival = load_obj(P_GS_Data / 'GS' / 'dict_stid_to_rival_DiffBrand_perm.pkl')
ls_rivals = list(dict_stid_to_rival.keys())


# In[29]:
for fuel in fuels:
    def calc_daily_response(f):
        df = read_hdf(f, 'GS')
        # df = df[df.StID.isin([9,208,13,4426])]
        df = df[df.StID.isin(ls_rivals)]
        df = df[df[fuel]>0].copy()
        df = df[df[fuel]!=df.groupby('StID')[fuel].shift(1)]
        df['H'] = df.Time.dt.hour; df = df[(df.H>=h_min) & (df.H<=h_max)]
        Me = df[['StID','Time']].rename(columns={'StID':'Me','Time':'ResponseTime'})
        Me['Rival'] = Me.Me.map(dict_stid_to_rival)
        Rival = df[['StID','Time']].rename(columns={'StID':'Rival','Time':'RivalTime'})
        df = merge(Me, Rival) # merge Me & Rival by 'Rival' column
        df = df[df.ResponseTime>=df.RivalTime].copy() # Only ResponseTime>=RivalTime
        df['Rival'] = df.Rival.astype(int)
        df = df.sort_values(by=['Rival','RivalTime','ResponseTime']).groupby(['Rival','RivalTime','Me']).ResponseTime.first().reset_index()
        df['Response'] = (df.ResponseTime - df.RivalTime).dt.seconds / 60
        df_daily_response = df.groupby('Me').Response.mean()
        df_daily_response.name = f.name.split('/')[-1].split('.')[0]
        return df_daily_response


    # In[30]:

    # In[31]:


    files = sorted(list((P_GS_Data_Raw / 'PH_Day').glob('*.h5')))
    print(len(files))
    ls_strYMD_weekday_nonholiday = load_obj(P_GS_Data / 'GS' / 'ls_strYMD_weekday_nonholiday.pkl')
    files = [f for f in files if f.stem in ls_strYMD_weekday_nonholiday]
    print(len(files))


    # In[32]:


    f = files[0]
    df = calc_daily_response(f)


    # In[33]:


    df.head()


    # In[34]:


    # ls = []
    # for f in files:
    # for f in files[:10]:
    #     # f = files[0]
    #     df_daily_response = calc_daily_response(f)
    #     if int(df_daily_response.name)%100==1:
    #         print(int(df_daily_response.name)//100, end=',')
    #     ls.append(df_daily_response)
    # df = calc_daily_response(files[0])
    with Pool(4) as p:
        ls = p.map(calc_daily_response, files)
    #     ls = p.map(calc_daily_response, files[:10])


    # In[35]:


    df = concat(ls, axis=1).stack().reset_index()
    df.head(2)


    # In[36]:


    df.columns = ['StID','YMD','Response']
    df['Response'] = df.Response.astype(int)
    df['YMD'] = df.YMD.astype(int)
    df = df.sort_values(['StID','YMD']).reset_index(drop=True)


    # In[37]:


    df.head(2)


    # In[39]:


    df.to_hdf(P_GS_Data / 'PCResponse' / ('rival_diffbrand_daily_weekday_nonholiday_'+fuel+'.h5'), 'GS', mode='w', complevel=9, complib='blosc')

    df.to_stata(P_GS_Data / 'PCResponse' / ('rival_diffbrand_daily_weekday_nonholiday_'+fuel+'.dta'))



fuel = 'e5'
marker = 'rival_res'

# Distance Based Rival
dict_stid_to_rival = load_obj(P_GS_Data / 'GS' / 'dict_stid_to_rival_DiffBrand_perm.pkl')
GSRival = Series(dict_stid_to_rival).reset_index(); GSRival.columns = ['O','D']
ls_rivals = list(dict_stid_to_rival.keys())

# In[6]:


files = sorted(list((P_GS_Data_Raw / 'PH_Day').glob('*.h5')))
print(len(files))
ls_strYMD_weekday_nonholiday = load_obj(P_GS_Data / 'GS' / 'ls_strYMD_weekday_nonholiday.pkl')
files = [f for f in files if f.stem in ls_strYMD_weekday_nonholiday]
print(len(files))

files = [f for f in files if (int(f.stem)>=20160101) & (int(f.stem)<=20181231)]
print(len(files))


rival_price_response_duration_threshold = 120

def count_npc_in_response_to_rival(f):
    df = read_hdf(f, 'GS')
    df.head(2)

    # df = df.loc[df.StID.isin([9,208,13,4426]), ['StID','Time',fuel]].copy()
    ls_stids = df.StID.unique().tolist()
    GSRival_ymd = GSRival[GSRival.O.isin(ls_stids) & GSRival.D.isin(ls_stids)]
    ls_stids = GSRival_ymd.O.tolist()
    df = df.loc[df.StID.isin(ls_stids), ['StID','Time',fuel]].copy()
    df = df[df[fuel]!=-1].copy()
    df['PC'] = df[fuel] - df.groupby('StID')[fuel].shift(1)
    df = df[(df.PC!=0) & df.PC.notnull()].copy()
    df['H'] = df.Time.dt.hour; df = df[(df.H>=h_min) & (df.H<=h_max)]

    dict_stid_to_npc = df.groupby('StID').H.count().to_dict() # PC Count
    # Count nRes
    Me = df[['StID','Time']].rename(columns={'StID':'Me','Time':'ResponseTime'})
    Me['Rival'] = Me.Me.map(dict_stid_to_rival)
    Rival = df[['StID','Time']].rename(columns={'StID':'Rival','Time':'RivalTime'})
    df = merge(Me, Rival) # merge Me & Rival by 'Rival' column
    df = df[df.ResponseTime>=df.RivalTime].copy() # Only ResponseTime>=RivalTime
    df['Rival'] = df.Rival.astype(int)
    df = df[(df.ResponseTime - df.RivalTime).dt.seconds / 60 < rival_price_response_duration_threshold].copy()
    df = df.sort_values(by=['Rival','RivalTime','ResponseTime']).drop_duplicates(['Me','ResponseTime'], keep='last')
    dict_stid_to_nres = df.groupby('Me').ResponseTime.count()
    # Group all data tgt
    df = GSRival_ymd.copy(); df.columns = ['Me','Rival']
    df['nRes'] = df.Me.map(dict_stid_to_nres)
    df['nPCMe'] = df.Me.map(dict_stid_to_npc)
    df['nPCRival'] = df.Rival.map(dict_stid_to_npc)
    df = df.fillna(0).astype(int)
    df['intYMD'] = int(f.stem)
    return df.loc[(df.nPCMe>0) & (df.nPCRival>0), ['Me','nRes','nPCMe','nPCRival','intYMD']].copy()

ls = []
for f in files:
    ls.append(count_npc_in_response_to_rival(f))


# In[79]:


df = concat(ls)
del ls
print(df.head(2))


# In[80]:


YMD = read_hdf(P_Research / 'Others' / 'DateTime' / 'utils_dates_YMD.h5', 'utils')
dict_intYMD_to_y2kW = YMD.set_index('intYMD').y2kW.to_dict()
df['y2kW'] = df.intYMD.map(dict_intYMD_to_y2kW)
df.head(2)


# In[81]:


df = df.rename(columns={'Me':'StID'})
df = df.groupby(['StID','y2kW'])[['nRes','nPCMe','nPCRival']].mean().reset_index()
df.head(2)


# In[82]:


# remove those w/o any variations
s = df.groupby('StID').nRes.sum()
df = df[df.StID.isin(s[s>0].index.tolist())]

# remove those with week gaps
gb = df.groupby('StID').y2kW
gap = concat([gb.nunique(), gb.max()-gb.min()], axis=1); gap.columns = ['nUnique','Dur']
ls_stids_without_gaps = gap[gap.nUnique==gap.Dur+1].index.tolist()
df = df[df.StID.isin(ls_stids_without_gaps)].copy()

df.StID.nunique()


# In[83]:


# remove those with very few obs
nW = df.groupby('StID').y2kW.nunique()
ls_stid_to_keep = nW[nW>=100].index.tolist()
df = df[df.StID.isin(ls_stid_to_keep)].copy()
df.StID.nunique()

fname = '-'.join([marker,'agg', 'e5']) + '.dta'
print(fname)


f = P_Data / 'PCResponse' /  fname
# df[df.StID<100].to_stata(f, write_index=False)
df.to_stata(f, write_index=False)